5 Host genomics

5.1 Host DNA fraction

5.1.1 Data overview

left_join(read_tsv("data/preprocessing.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>%
    mutate(host_percentage= host_bases/bases_post_fastp*100)  %>% 
    filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
    group_by(sample_type) %>%
    summarise(mean=mean(host_percentage, na.rm=T),sd=sd(host_percentage, na.rm=T)) %>% 
    tt()
tinytable_b5yredxxlx0y3jgq5hqc
sample_type mean sd
Anal/cloacal swab 75.76413 32.05899
Faecal 22.94532 32.09473
left_join(read_tsv("data/preprocessing.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>%
    mutate(host_percentage= host_bases/bases_post_fastp*100)  %>% 
    filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
    group_by(tax_group) %>%
    summarise(mean=mean(host_percentage, na.rm=T),sd=sd(host_percentage, na.rm=T)) %>% 
    tt()
tinytable_c5yovke33sjzsb3c7k6k
tax_group mean sd
Amphibians 0.2968512 1.367301
Bats 49.4004705 37.779131
Birds 58.5039633 38.603463
Mammals 29.5029544 36.464343
Reptiles 12.4971018 22.008872

5.1.2 Statistical test

left_join(read_tsv("data/preprocessing.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>%
    mutate(host_percentage= host_bases/bases_post_fastp*100)  %>% 
    filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
    lm(host_percentage ~ sample_type + tax_group, data = .)  %>%
    anova() %>%
    tidy()
# A tibble: 3 × 6
  term           df    sumsq  meansq statistic    p.value
  <chr>       <int>    <dbl>   <dbl>     <dbl>      <dbl>
1 sample_type     1  798510. 798510.      991.  2.27e-177
2 tax_group       4  456840. 114210.      142.  6.34e-107
3 Residuals    2019 1626206.    805.       NA  NA        

5.1.3 Plot

left_join(read_tsv("data/preprocessing.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>%
    mutate(host_percentage= host_bases/bases_post_fastp*100)  %>% #convert bases to gigabases (GB)
    filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
    mutate(tax_group=factor(tax_group,levels=c("Amphibians","Reptiles","Birds","Bats","Mammals"))) %>% 
    ggplot(., aes(y=host_percentage, x=tax_group, color=tax_group, fill=tax_group, group=tax_group)) +
        geom_jitter(alpha = 0.2, width=0.3) +
        geom_boxplot(outlier.shape = NA) + 
        scale_color_manual(values = c("#228833","#EE6677","#CCBB44","#66CCEE","#4477AA")) +
        scale_fill_manual(values = c("#22883380","#EE667780","#CCBB4480","#66CCEE80","#4477AA80")) +
        theme_classic() +
        facet_grid(~sample_type) +
        labs(y="Host percentage", color="Taxa", fill="Taxa") +
        theme_classic()

ggsave("figures/hostdata_taxa.pdf",width=9, height=4, units="in")
left_join(read_tsv("data/preprocessing.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>%
    mutate(host_percentage= host_bases/bases_post_fastp*100)  %>% #convert bases to gigabases (GB)
    filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
    mutate(tax_group=factor(tax_group,levels=c("Amphibians","Reptiles","Birds","Bats","Mammals"))) %>% 
    ggplot(., aes(y=host_percentage, x=sample_type, group=sample_type)) +
        stat_halfeye(adjust = 1, width = 0.5, .width = 0, justification = 0,normalize = "groups") +
        theme_classic() +
        labs(y="Host percentage", color="Taxa", fill="Taxa") +
        theme_classic()

ggsave("figures/hostdata_taxa_all.pdf",width=9, height=4, units="in")

5.2 Genome depth

5.2.1 Data overview

left_join(read_tsv("data/preprocessing.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>%
    left_join(read_tsv("data/reference.tsv"),by="reference_id") %>% 
    mutate(depth=host_bases/(reference_size*1000000))  %>% #convert bases to gigabases (GB)
    filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
    group_by(sample_type) %>%
    summarise(mean=mean(depth, na.rm=T),sd=sd(depth, na.rm=T)) %>% 
    tt()
tinytable_3dy9w0cfmk79qi9xaaa8
sample_type mean sd
Anal/cloacal swab 2.4220586 2.079382
Faecal 0.6573435 1.628490
left_join(read_tsv("data/preprocessing.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>%
    left_join(read_tsv("data/reference.tsv"),by="reference_id") %>% 
    mutate(depth=host_bases/(reference_size*1000000))  %>% #convert bases to gigabases (GB)
    filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
    group_by(tax_group) %>%
    summarise(mean=mean(depth, na.rm=T),sd=sd(depth, na.rm=T)) %>% 
    tt()
tinytable_d306mdmo55dvh49ccut4
tax_group mean sd
Amphibians 0.001921062 0.01314311
Bats 1.246383229 1.39021316
Birds 2.478493853 3.08305053
Mammals 0.595230910 1.27474674
Reptiles 0.345241814 0.60682463

5.2.2 Statistical test

left_join(read_tsv("data/preprocessing.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>%
    left_join(read_tsv("data/reference.tsv"),by="reference_id") %>% 
    mutate(depth=host_bases/(reference_size*1000000))  %>% #convert bases to gigabases (GB)
    filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
    lm(depth ~ sample_type + tax_group, data = .)  %>%
    anova() %>%
    tidy()
# A tibble: 3 × 6
  term           df sumsq meansq statistic   p.value
  <chr>       <int> <dbl>  <dbl>     <dbl>     <dbl>
1 sample_type     1  893. 893.       349.   6.07e-72
2 tax_group       4  772. 193.        75.3  1.28e-59
3 Residuals    2035 5211.   2.56      NA   NA       

5.2.3 Plot

left_join(read_tsv("data/preprocessing.tsv"),
          read_tsv("data/sample.tsv"),
          by="sample_id") %>%
    left_join(read_tsv("data/reference.tsv"),by="reference_id") %>% 
    mutate(depth=host_bases/(reference_size*1000000))  %>% #convert bases to gigabases (GB)
    filter(sample_type %in% c("Faecal", "Anal/cloacal swab")) %>%
    mutate(tax_group=factor(tax_group,levels=c("Amphibians","Reptiles","Birds","Bats","Mammals"))) %>% 
    ggplot(., aes(y=depth, x=sample_type, color=sample_type, fill=sample_type, group=sample_type)) +
        ylim(0,10)+
        geom_boxplot(outlier.shape = NA) + 
        scale_color_manual(values = c("#bdca50", "#AA3377")) +   
        scale_fill_manual(values = c("#bdca5080", "#AA337780")) +
        theme_classic() +
        labs(y="Host depth of coverage", color="Taxa", fill="Taxa") +
        theme_classic()

ggsave("figures/hostdepth_taxa.pdf",width=5, height=4, units="in")